/*******************************************************************************
Author: Jason Robey
Date: 05/27/2025

Purpose: Defining a program that transforms aggregated data from single-years, 
single-ages to measures for five-year cohorts. Seven different approaches to 
collapsing the single-year data are used. Each approach employs a different 
strategy for collapsing the properties of a birth cohort based on age-period 
data. The seven strategies are defined in the manuscript. 

Before utilizing this program, we strongly encourage reading the "_README.txt" 
file for further details on the program, how to apply it, the output it 
produces, and its limitations.
*******************************************************************************/

capture program drop cohort_measures
prog def cohort_measures 
	args year age vars 
	
	keep `year' `age' `vars'
	*Categoricals 
	recode `age' (0/4=0 "0-4") (5/9=5 "5-9") (10/14=10 "10-14") ///
		(15/19=15 "15-19") (20/24=20 "20-24") (25/29=25 "25-29") ///
		(30/34=30 "30-34") (35/39=35 "35-39") (40/44=40 "40-44") (45/49=45 "45-49") ///
		(50/54=50 "50-54") (55/59=55 "55-59") (60/64=60 "60-64") (65/69=65 "65-69") ///
		(70/74=70 "70-74") (75/79=75 "75-79") (80/84=80 "80-84") (85/89=85 "85-89") ///
		(90/94=90 "90-94") (95/99=95 "95-99") (100/max=100 "100+"), gen(`age'_cat)

	gen `age'2 = `age' - `age'_cat 

	order `age'_cat `age'2, after(`age') 
		
	*year categories
	#delimit ; 
	recode `year' (1900/1904 = 1900 "1900-04") (1905/1909 = 1905 "1905-1909")
		(1910/1914 = 1910 "1910-14") (1915/1919 = 1915 "1915-19")
		(1920/1924 = 1920 "1920-24") (1925/1929 = 1925 "1925-29")
		(1930/1934 = 1930 "1930-34") (1935/1939 = 1935 "1935-39") 
		(1940/1944 = 1940 "1940-44") (1945/1949 = 1945 "1945-49")
		(1950/1954 = 1950 "1950-54") (1955/1959 = 1955 "1955-59") 
		(1960/1964 = 1960 "1960-64") (1965/1969 = 1965 "1965-69") 
		(1970/1974 = 1970 "1970-74") (1975/1979 = 1975 "1975-79") 
		(1980/1984 = 1980 "1980-84") (1985/1989 = 1985 "1985-89") 
		(1990/1994 = 1990 "1990-94") (1995/1999 = 1995 "1995-99") 
		(2000/2004 = 2000 "2000-04") (2005/2009 = 2005 "2005-09") 
		(2010/2014 = 2010 "2010-14") (2015/2019 = 2015 "2015-19")
		(2020/2024 = 2020 "2020-24")
		, gen(`year'_cat);
	#delimit cr 

	gen `year'2 = `year' - `year'_cat 

	order `year'_cat `year'2, after(`year') 

	keep `age'* `year'_cat `year'2 `vars'

	reshape wide `vars', i(`year'_cat `age' `age'2 `age'_cat) j(`year'2)	

	forval x = 0/4 {
		rename *`x' *_y`x'
	}

	rename `age'_y2 `age'2 

	keep `age'_cat `age'2 `year'* *_y*

	reshape wide *_y*, i(`age'_cat `year'_cat) j(`age'2)

	forval x = 0/4 {
		forval y = 0/4 {
			rename *_y`x'`y' *_y`x'_a`y'
		}
	}

	gen birth_year = `year'_cat - `age'_cat 

	*birth year categories
	#delimit ; 
	recode birth_year 	
		(1900/1904 = 1900 "1900-04") (1905/1909 = 1905 "1905-09") 
		(1910/1914 = 1910 "1910-14") (1915/1919 = 1915 "1915-19") 
		(1920/1924 = 1920 "1920-24") (1925/1929 = 1925 "1925-29") 
		(1930/1934 = 1930 "1930-34") (1935/1939 = 1935 "1935-39")
		(1940/1944 = 1940 "1940-44") (1945/1949 = 1945 "1945-49")
		(1950/1954 = 1950 "1950-54") (1955/1959 = 1955 "1955-59") 
		(1960/1964 = 1960 "1960-64") (1965/1969 = 1965 "1965-69") 
		(1970/1974 = 1970 "1970-74") (1975/1979 = 1975 "1975-79") 
		(1980/1984 = 1980 "1980-84") (1985/1989 = 1985 "1985-89") 
		(1990/1994 = 1990 "1990-94") (1995/1999 = 1995 "1995-99") 
		(2000/2004 = 2000 "2000-04") (2005/2009 = 2005 "2005-09") 
		(2010/2014 = 2010 "2010-14") (2015/2019 = 2015 "2015-19")
		(2020/2024 = 2020 "2020-24")
		, gen(birth_year_cat);
	#delimit cr 

	drop birth_year 
	order birth_year_cat, after(`age'_cat)

	foreach var in `vars'{
		egen double `var'_pc = rowtotal(`var'_y*_a*)
		egen double `var'_y0 = rowtotal(`var'_y0_a*)
		egen double `var'_y1 = rowtotal(`var'_y1_a*)
		egen double `var'_y2 = rowtotal(`var'_y2_a*)
		egen double `var'_y3 = rowtotal(`var'_y3_a*)	
		egen double `var'_y4 = rowtotal(`var'_y4_a*)
		egen double `var'_rc_h1 = rowtotal(`var'_y0_a0 `var'_y1_a0 `var'_y2_a0 ///
			`var'_y3_a0 `var'_y4_a0 ///
			`var'_y1_a1 `var'_y2_a1 `var'_y3_a1 `var'_y4_a1 ///
			`var'_y2_a2 `var'_y3_a2 `var'_y4_a2 ///
			`var'_y3_a3 `var'_y4_a3 ///
			`var'_y4_a4)
		egen double `var'_rc_h2 = rowtotal(`var'_y0_a4 `var'_y1_a4 `var'_y2_a4 ///
			`var'_y3_a4 `var'_y4_a4 ///
			`var'_y0_a3 `var'_y1_a3 `var'_y2_a3 `var'_y3_a3 ///
			`var'_y0_a2 `var'_y1_a2 `var'_y2_a2 ///
			`var'_y0_a1 `var'_y1_a1 ///
			`var'_y0_a0)
		egen double `var'_rca1_h1 = rowtotal(`var'_y1_a0 `var'_y2_a0 ///
			`var'_y3_a0 `var'_y4_a0 ///
			`var'_y2_a1 `var'_y3_a1 `var'_y4_a1 ///
			`var'_y3_a2 `var'_y4_a2 ///
			`var'_y4_a3)
		egen double `var'_rca1_h2 = rowtotal(`var'_y0_a4 `var'_y1_a4 `var'_y2_a4 ///
			`var'_y3_a4 ///
			`var'_y0_a3 `var'_y1_a3 `var'_y2_a3 ///
			`var'_y0_a2 `var'_y1_a2 ///
			`var'_y0_a1)
		egen double `var'_rca2_h2 = rowtotal(`var'_y0_a4 `var'_y1_a4 `var'_y2_a4 ///
			`var'_y3_a4 ///
			`var'_y0_a3 `var'_y1_a3 `var'_y2_a3 ///
			`var'_y0_a2 `var'_y1_a2 ///
			`var'_y0_a1)
	}


	foreach var in `vars' {
		foreach typ in pc y0 y1 rc_h2 rca1_h2 rca2_h2 {
			bysort `age'_cat (`year'_cat): gen double `var'_`typ'_lead = `var'_`typ'[_n+1]
		}
	}


	foreach var in `vars' { 
		recode `var'_pc `var'_pc_lead `var'_rc* (0=.)
		gen double `var'_pc_both = `var'_pc + `var'_pc_lead
		replace `var'_pc_both = . if `var'_pc==. | `var'_pc_lead==.
		gen double `var'_rc = `var'_rc_h1 + `var'_rc_h2_lead
		gen double `var'_rca1 = `var'_rca1_h1 + `var'_rca1_h2_lead 
		gen double `var'_rca2 = `var'_rc_h1 + `var'_rca2_h2_lead
		gen double `var'_pc_my = `var'_y3 + `var'_y4 + `var'_y0_lead + `var'_y1_lead
	}
	
	bysort age_cat (year_cat): gen year_cat_lead = year_cat[_n+1]

end 

